scom-cam colab

if (T) {
# load packages
library("tidyverse")
require(gridExtra)
library(grid)
#library("quarto")
library("irr")
# clear workspace
rm(list=ls())
# load functions
source("../src/functions.R")
}
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.0     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.1     ✔ tibble    3.1.8
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Loading required package: gridExtra


Attaching package: 'gridExtra'


The following object is masked from 'package:dplyr':

    combine


Loading required package: lpSolve
#
#dt03 |> as_tibble()

# load dataset, rename cols, country
fn = "../csv/haidi-data-231012.csv"
#data = read.table(fn, sep='\t', quote="", header=F, strip.white=TRUE, stringsAsFactors=FALSE) |> as_tibble() |> rename_with(~ cn, all_of(paste0(rep("V",34), seq(1,34))))
data = read.table(fn, sep='\t', header=T, strip.white=T, stringsAsFactors=F) |> as_tibble()

#
#data = data |> 
#mutate(v35_country = as.numeric(str_extract(data$v03_news.source, "^."))) |> 
#mutate(v35_country = ifelse(v35_country==1,"se", ifelse(v35_country==2,"dk", ifelse(v35_country==3,"fi",NA))))

# clean data
data[,18:21] = data[,18:21] |> map(~str_extract(., "^\\d+") |> as.numeric()) |> as_tibble()
data = data |> rename(v33_power.sum.index=33)

# 
#write.table(data, "../csv/haidi-data-231012.csv", sep="\t", quot=T, row.names=F)

231025: color theme

library(RColorBrewer)

#q + scale_colour_brewer(palette = "Blues")
display.brewer.all()

# custom theme
some_graph <- theme(panel.grid.major=element_line(size=2))
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
some_color <- c("deeppink", "chartreuse", "midnightblue")
# put the elements in a list
theme_haidi <- list(some_graph, scale_color_manual(values=some_color))

231019: descriptives

pd = data |> group_by(v03_news.source, v35_country) |> summarize(count=n()) |> 
left_join(dt03, by=join_by(v03_news.source==V2)) |> 
mutate(cn=paste(v35_country, V3), cc=as.factor(v35_country))
`summarise()` has grouped output by 'v03_news.source'. You can override using
the `.groups` argument.
p1 = ggplot(pd, aes(x=v35_country, y=count, fill=cn)) +
    geom_bar(stat="identity", position=position_dodge()) + 
    scale_fill_discrete(name=pd$V1[1]) + 
    labs(y="count", x="v35_country", title="v03_news.source by v35_country") + 
    theme_minimal()

#    scale_fill_discrete(name=pd$V1[1], labels=V3)
#    scale_fill_manual(values=pd$cx)
p1

pd = data |> select(35,33) |> rename(v35=1, v33=2)
p1 = ggplot(pd, aes(x=v35, y=v33, fill=v35)) +
    geom_boxplot(alpha=1.0) +
    stat_summary(fun=mean) +
    labs(y="v33_power.sum.index", x="v35_country", title="v33_power.sum.index by v35_country") + 
    theme(legend.position="none")

#
p1
Warning: Removed 3 rows containing missing values (`geom_segment()`).

for (i in 33:34) {
pl = desc_get(data, get(paste0("dt",i)), i)
}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

231018: descriptives

for (i in 22:32) {
pl = desc_get(data, get(paste0("dt",i)), i)
}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

for (i in 18:21) {
pl = desc_get(data, get(paste0("dt",i)), i)
}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

231017: descriptives

for (i in 12:15) {
pl = desc_get(data, get(paste0("dt",i)), i)
}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

231016: descriptives

for (i in 6:9) {
pl = desc_get(data, get(paste0("dt0",i)), i)
}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

pl = desc_get(data, dt04, 4)
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.

#grid.arrange(pl[1], pl[2], ncol=2, widths=c(3, 4))

231012: descriptives

# plot data, grouped
pdg = data |> 
group_by(v35_country, v05_article.size) |> 
summarize(count=n()) |> 
mutate(prop=count/sum(count)) |> 
mutate(prop_lab=rev(cumsum(rev(prop)))-prop/2) |> 
mutate(count_lab=rev(cumsum(rev(count)))-count/2) |> 
left_join(dt05 |> as_tibble() |> arrange(V2), by=join_by(v05_article.size==V2), keep=T) 
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
#
p1 = ggplot(data=pdg, aes(fill=as.factor(V2), y=count, x=v35_country)) + 
    geom_bar(position="fill", stat="identity") +
    scale_y_continuous(labels=scales::percent) +
#    geom_text(aes(label=paste0(prop*100,"%")), position=position_stack(vjust=0.5), size=2)
    geom_text(aes(label=paste0(round(100*prop,1),"%"), y=prop_lab), size=3) +
    scale_fill_discrete(name=dt05$V1[1], labels=dt05$V3)

#
p2 = ggplot(data=pdg, aes(fill=as.factor(V2), y=count, x=v35_country)) + 
    geom_bar(position="stack", stat="identity") +
    geom_text(aes(label=count, y=count_lab), size=3) +
    scale_fill_discrete(name=dt05$V1[1], labels=dt05$V3)

#
p3 = ggplot(data=pdg, aes(fill=as.factor(V2), y=count, x=v35_country)) + 
    geom_bar(position="stack", stat="identity") +
#    geom_text(aes(label=count, y=count_lab), size=3) +
    geom_text(aes(label=paste0(round(100*prop,1),"%"), y=count_lab), size=3) +
#    scale_fill_discrete(name=dt05$V1[1], labels=dt05$V3)
    scale_fill_discrete(guide="none")

# plot data, combined
pdc = data |> 
group_by(v05_article.size) |> 
summarize(count=n()) |> 
mutate(prop=count/sum(count)) |> 
mutate(prop_lab=rev(cumsum(rev(prop)))-prop/2) |> 
mutate(count_lab=rev(cumsum(rev(count)))-count/2) |> 
left_join(dt05 |> as_tibble(), by=join_by(v05_article.size==V2), keep=T) 

#
p4 = ggplot(data=pdc, aes(fill=as.factor(V2), y=count, x="combined")) + 
    geom_bar(position="fill", stat="identity") +
    scale_y_continuous(labels=scales::percent) +
    geom_text(aes(label=paste0(round(100*prop,1),"%"), y=prop_lab), size=3) +
    scale_fill_discrete(name=dt05$V1[1], labels=dt05$V3) +
    labs(y="percentage", x="v35_country")

#par(mfrow = c(1,2))
#pdf("../fig/foo.pdf")#png()
grid.arrange(p4, p3, ncol=2, widths=c(3, 4))

#dev.off()
  • combined dataset link

231009: dk icr re-test

# load dataset
fn = "../csv/haidi-wp1-coding-dk-2.tsv"
#data = read.table(fn, sep='\t', header=F, strip.white=TRUE, stringsAsFactors=FALSE, quote="")
data = read.table(fn, sep='\t', header=F, strip.white=TRUE, stringsAsFactors=FALSE) |> as_tibble()

# select data
data = data |> 
na.omit() |> 
select(-c(2)) |> 
select(last_col(), 1:29) |> 
rename(coder_id=V31, content_id=V1)
# content_id for both coders
data$some = data$content_id
# clean data
#data$V3 = data$V3 |> str_extract("\\d+") |> as.numeric()

analyze data

# 
write.table(data, "../csv/haidi-dk-2.tsv", sep="\t", quot=T, row.names=F)
# transform data to wide (10 content units * 28 content vars)
data = data |> 
pivot_wider(id_cols=coder_id, names_from=some, values_from=c(3:30)) |> 
select(-coder_id)
# 
data
# A tibble: 2 × 280
  V3_DK001 V3_DK002 V3_DK003 V3_DK004 V3_DK005 V3_DK006 V3_DK007 V3_DK…¹ V3_DK…²
     <int>    <int>    <int>    <int>    <int>    <int>    <int>   <int>   <int>
1      208      206      208      206      206      206      206     206     206
2      208      206      208      206      206      206      206     206     206
# … with 271 more variables: V3_DK010 <int>, V4_DK001 <int>, V4_DK002 <int>,
#   V4_DK003 <int>, V4_DK004 <int>, V4_DK005 <int>, V4_DK006 <int>,
#   V4_DK007 <int>, V4_DK008 <int>, V4_DK009 <int>, V4_DK010 <int>,
#   V5_DK001 <int>, V5_DK002 <int>, V5_DK003 <int>, V5_DK004 <int>,
#   V5_DK005 <int>, V5_DK006 <int>, V5_DK007 <int>, V5_DK008 <int>,
#   V5_DK009 <int>, V5_DK010 <int>, V6_DK001 <int>, V6_DK002 <int>,
#   V6_DK003 <int>, V6_DK004 <int>, V6_DK005 <int>, V6_DK006 <int>, …
# inter rater reliability
data = as.matrix(data)
# specify data type: nominal, ordinal, interval, ratio 
kripp.alpha(data, method="nominal")
 Krippendorff's alpha

 Subjects = 280 
   Raters = 2 
    alpha = 0.704 
# write data
fn = "../csv/haidi-wp1-coding-dk-2-wide.csv"
write.table(data, fn, sep="\t", quot=T, row.names=F)
  • dataset wide format link

231004: all

# bash code chunk
head -n1 ../csv/haidi-all.tsv | tr '\t' '\n' | cat -n | head
     1  "coder_id"
     2  "content_id"
     3  "V3"
     4  "V4"
     5  "V5"
     6  "V6"
     7  "V7"
     8  "V8"
     9  "V9"
    10  "V12"
# load datasets
fn = "../csv/haidi-all.tsv"
data = read.table(fn, sep='\t', header=T, strip.white=TRUE, stringsAsFactors=FALSE) |> as_tibble()

data |> group_by(content_id, coder_id) |> summarize(count=n())
`summarise()` has grouped output by 'content_id'. You can override using the
`.groups` argument.
# A tibble: 60 × 3
# Groups:   content_id [40]
   content_id coder_id count
   <chr>      <chr>    <int>
 1 DK001      A            1
 2 DK001      B            1
 3 DK002      A            1
 4 DK002      B            1
 5 DK003      A            1
 6 DK003      B            1
 7 DK004      A            1
 8 DK004      B            1
 9 DK005      A            1
10 DK005      B            1
# … with 50 more rows
data |> group_by(V4) |> summarize(count=n())
# A tibble: 4 × 2
     V4 count
  <int> <int>
1     1    44
2     2    10
3     3     3
4     4     3
data |> group_by(V4) |> summarize(count=n()) |>
ggplot(aes(x=V4, y=count)) +
#  geom_bar(fill="green", stat="identity") + 
  geom_bar(stat="identity") + 
  theme_minimal()

data |> group_by(V4) |> summarize(count=n()) |> arrange(desc(V4)) |> mutate(prop=round(count*100/sum(count), 1), lab.ypos=cumsum(prop) - 0.5*prop)
# A tibble: 4 × 4
     V4 count  prop lab.ypos
  <int> <int> <dbl>    <dbl>
1     4     3   5        2.5
2     3     3   5        7.5
3     2    10  16.7     18.4
4     1    44  73.3     63.4
data |> group_by(V4) |> summarize(count=n()) |> arrange(desc(V4)) |> mutate(prop=round(count*100/sum(count), 1), lab.ypos=cumsum(prop) - 0.5*prop) |> 
ggplot(aes(x="", y=prop, fill=V4)) +
  geom_bar(width=1, stat="identity", color="white") +
  geom_text(aes(y=lab.ypos, label=prop), color="white") +
  coord_polar("y", start=0) +
  theme_minimal()

231003: all datasets

# load datasets
fn = "../csv/haidi-dk.tsv"
data_dk = read.table(fn, sep='\t', header=T, strip.white=TRUE, stringsAsFactors=FALSE)
fn = "../csv/haidi-fi.tsv"
data_fi = read.table(fn, sep='\t', header=T, strip.white=TRUE, stringsAsFactors=FALSE)
fn = "../csv/haidi-se.tsv"
data_se = read.table(fn, sep='\t', header=T, strip.white=TRUE, stringsAsFactors=FALSE)

data = rbind(data_dk, data_fi, data_se)
data |> as_tibble()
# A tibble: 60 × 31
   coder_id conten…¹    V3    V4    V5    V6    V7    V8    V9   V12   V13   V14
   <chr>    <chr>    <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
 1 A        DK001      208     4     3     2     1     0    99     0     0     0
 2 A        DK002      206     4     3     2     4     0    99     0     0     0
 3 A        DK003      208     1     2     0    99     0    99     0     0     0
 4 A        DK004      206     1     3     1     3     0    99     0     0     1
 5 A        DK005      206     1     2     0    99     0    99     0     0     0
 6 A        DK006      206     1     3     1     3     0    99     0     0     0
 7 A        DK007      206     1     3     0    99     0    99     0     0     0
 8 A        DK008      206     1     2     1     3     0    99     0     0     0
 9 A        DK009      206     1     2     1     3     0    99     1     0     0
10 A        DK010      206     1     1     0    99    99    99     0     0     0
# … with 50 more rows, 19 more variables: V15 <int>, V18 <int>, V19 <int>,
#   V20 <int>, V21 <int>, V22 <int>, V23 <int>, V24 <int>, V25 <int>,
#   V26 <int>, V27 <int>, V28 <int>, V29 <int>, V30 <int>, V31 <int>,
#   V32 <int>, V33 <int>, V34 <int>, some <chr>, and abbreviated variable name
#   ¹​content_id
# 
write.table(data, "../csv/haidi-all.tsv", sep="\t", quot=T, row.names=F)

230525: finnish dataset

# load dataset
fn = "../csv/haidi-wp1-coding-fi.tsv"
#data = read.table(fn, sep='\t', header=F, strip.white=TRUE, stringsAsFactors=FALSE)
data = read.table(fn, sep='\t', header=F, quote="", strip.white=TRUE, stringsAsFactors=FALSE)
# select data
data = data |> 
as_tibble() |> 
na.omit() |> 
select(-c(2,10,11,16,17)) |> 
select(last_col(), 1:29) |> 
rename(coder_id=V35, content_id=V1)
# content_id for both coders
data$some = data$content_id
# clean data
data$V3 = data$V3 |> str_extract("\\d+") |> as.numeric()

analyze data

# 
write.table(data, "../csv/haidi-fi.tsv", sep="\t", quot=T, row.names=F)
# transform data to wide (10 content units * 28 content vars)
data = data |> 
pivot_wider(id_cols=coder_id, names_from=some, values_from=c(3:30)) |> 
select(-coder_id)
# 
data
# A tibble: 2 × 280
  V3_FI001 V3_FI002 V3_FI003 V3_FI004 V3_FI005 V3_FI006 V3_FI007 V3_FI…¹ V3_FI…²
     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>   <dbl>   <dbl>
1      315      311      311      313      314      315      315     314     315
2      315      311      311      313      314      315      315     314     315
# … with 271 more variables: V3_FI010 <dbl>, V4_FI001 <int>, V4_FI002 <int>,
#   V4_FI003 <int>, V4_FI004 <int>, V4_FI005 <int>, V4_FI006 <int>,
#   V4_FI007 <int>, V4_FI008 <int>, V4_FI009 <int>, V4_FI010 <int>,
#   V5_FI001 <int>, V5_FI002 <int>, V5_FI003 <int>, V5_FI004 <int>,
#   V5_FI005 <int>, V5_FI006 <int>, V5_FI007 <int>, V5_FI008 <int>,
#   V5_FI009 <int>, V5_FI010 <int>, V6_FI001 <int>, V6_FI002 <int>,
#   V6_FI003 <int>, V6_FI004 <int>, V6_FI005 <int>, V6_FI006 <int>, …
# inter rater reliability
data = as.matrix(data)
# specify data type: nominal, ordinal, interval, ratio 
kripp.alpha(data, method="nominal")
 Krippendorff's alpha

 Subjects = 280 
   Raters = 2 
    alpha = 0.72 
# write data
fn = "../csv/haidi-wp1-coding-fi-wide.csv"
write.table(data, fn, sep="\t", quot=T, row.names=F)
  • dataset wide format link

230523: danish dataset

# load dataset
fn = "../csv/haidi-wp1-coding-dk.tsv"
data = read.table(fn, sep='\t', header=F, strip.white=TRUE, stringsAsFactors=FALSE)
data = read.table(fn, sep='\t', header=F, quote="", strip.white=TRUE, stringsAsFactors=FALSE)
# select data
data = data |> 
as_tibble() |> 
na.omit() |> 
select(-c(2,10,11,16,17)) |> 
select(last_col(), 1:29) |> 
rename(coder_id=V35, content_id=V1)
# content_id for both coders
data$some = data$content_id
# clean data
data$V18 = data$V18 |> str_extract("^\\d+") |> as.numeric()

analyze data

# 
write.table(data, "../csv/haidi-dk.tsv", sep="\t", quot=T, row.names=F)
# transform data to wide (10 content units * 28 content vars)
data = data |> 
pivot_wider(id_cols=coder_id, names_from=some, values_from=c(3:30)) |> 
select(-coder_id)
# 
data
# A tibble: 2 × 280
  V3_DK001 V3_DK002 V3_DK003 V3_DK004 V3_DK005 V3_DK006 V3_DK007 V3_DK…¹ V3_DK…²
     <int>    <int>    <int>    <int>    <int>    <int>    <int>   <int>   <int>
1      208      206      208      206      206      206      206     206     206
2      208      206      208      206      206      206      206     206     206
# … with 271 more variables: V3_DK010 <int>, V4_DK001 <int>, V4_DK002 <int>,
#   V4_DK003 <int>, V4_DK004 <int>, V4_DK005 <int>, V4_DK006 <int>,
#   V4_DK007 <int>, V4_DK008 <int>, V4_DK009 <int>, V4_DK010 <int>,
#   V5_DK001 <int>, V5_DK002 <int>, V5_DK003 <int>, V5_DK004 <int>,
#   V5_DK005 <int>, V5_DK006 <int>, V5_DK007 <int>, V5_DK008 <int>,
#   V5_DK009 <int>, V5_DK010 <int>, V6_DK001 <int>, V6_DK002 <int>,
#   V6_DK003 <int>, V6_DK004 <int>, V6_DK005 <int>, V6_DK006 <int>, …
# inter rater reliability
data = as.matrix(data)
# specify data type: nominal, ordinal, interval, ratio 
kripp.alpha(data, method="nominal")
 Krippendorff's alpha

 Subjects = 280 
   Raters = 2 
    alpha = 0.639 
# write data
fn = "../csv/haidi-wp1-coding-dk-wide.csv"
write.table(data, fn, sep="\t", quot=T, row.names=F)
  • dataset wide format link

230515: swedish dataset

# load dataset
fn = "../csv/some.tsv"
data = read.table(fn, sep='\t', header=F, strip.white=TRUE, stringsAsFactors=FALSE)
data = read.table(fn, sep='\t', header=F, quote="", strip.white=TRUE, stringsAsFactors=FALSE)
# select data
data = data |> as_tibble() |> select(-c(2,10,11,16,17)) |> select(last_col(), 1:29) |> rename(coder_id=V35, content_id=V1)
# duplicate content_id for both coders
data$some = rep(data$content_id[data$coder_id=="A"], 2)

230516: analyze data

# 
write.table(data, "../csv/haidi-se.tsv", sep="\t", quot=T, row.names=F)
# transform data to wide (10 content units * 28 content vars)
data = data |> 
pivot_wider(id_cols=coder_id, names_from=some, values_from=c(3:30)) |> 
select(-coder_id)
# 
data
# A tibble: 2 × 280
  V3_S001 V3_S002 V3_S003 V3_S004 V3_S005 V3_S006 V3_S007 V3_S008 V3_S009
    <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>
1     102     103     102     101     104     102     103     101     101
2     102     103     102     101     104     102     103     101     101
# … with 271 more variables: V3_S010 <int>, V4_S001 <int>, V4_S002 <int>,
#   V4_S003 <int>, V4_S004 <int>, V4_S005 <int>, V4_S006 <int>, V4_S007 <int>,
#   V4_S008 <int>, V4_S009 <int>, V4_S010 <int>, V5_S001 <int>, V5_S002 <int>,
#   V5_S003 <int>, V5_S004 <int>, V5_S005 <int>, V5_S006 <int>, V5_S007 <int>,
#   V5_S008 <int>, V5_S009 <int>, V5_S010 <int>, V6_S001 <int>, V6_S002 <int>,
#   V6_S003 <int>, V6_S004 <int>, V6_S005 <int>, V6_S006 <int>, V6_S007 <int>,
#   V6_S008 <int>, V6_S009 <int>, V6_S010 <int>, V7_S001 <int>, …
# inter rater reliability
data = as.matrix(data)
# specify data type: nominal, ordinal, interval, ratio 
kripp.alpha(data, method="nominal")
 Krippendorff's alpha

 Subjects = 280 
   Raters = 2 
    alpha = 0.784 
# write data
fn = "../csv/test.csv"
write.table(data, fn, sep="\t", quot=T, row.names=F)
  • dataset wide format link

230426: sample dataset

# load dataset
data = readxl::read_excel('../csv/some.xlsx', sheet="Blad1", col_names=paste0("x", seq(34)))
# select data
data = data |> 
mutate(coder_id=c(rep("a",5), rep("b",5)), content_id=rep(seq(5), 2)) |> 
select("coder_id","content_id",1,3,9,10) |> print(n=100)
# A tibble: 10 × 6
   coder_id content_id x1       x3    x9 x10                                    
   <chr>         <int> <chr> <dbl> <dbl> <chr>                                  
 1 a                 1 S001    102    99 Dolda larmsiffrorna: Så dåligt mår 85-…
 2 a                 2 S002    103    99 Satsningar som räddar liv              
 3 a                 3 S003    102    99 Detta måste ni rätta till i vården, po…
 4 a                 4 S004    101    99 Sju utmaningar - därför är det kris i …
 5 a                 5 S005    104    99 De kommande årens satsningar sker i pr…
 6 b                 1 S006    102     0 De har full koll på senioren           
 7 b                 2 S007    103    99 Mossig kritik mot vårdappar            
 8 b                 3 S008    101    99 Folksjukdomar som kan förvärras i spår…
 9 b                 4 S009    101    99 Så vill regeringen möta utmaningarna i…
10 b                 5 S010    105    99 Tekniken ska avlasta personalen        
# transform data
data = data |> 
pivot_wider(id_cols=coder_id, names_from=content_id, values_from=x3) |> 
select(-coder_id)
# https://rpubs.com/jacoblong/content-analysis-krippendorff-alpha-R
data
# A tibble: 2 × 5
    `1`   `2`   `3`   `4`   `5`
  <dbl> <dbl> <dbl> <dbl> <dbl>
1   102   103   102   101   104
2   102   103   101   101   105

230427: analyze data

# inter rater reliability
data = as.matrix(data)
# specify data type: nominal, ordinal, interval, ratio 
kripp.alpha(data, method="nominal")
 Krippendorff's alpha

 Subjects = 5 
   Raters = 2 
    alpha = 0.526 
  • Krippendorff’s Alpha values range from -1 to 1, with 1 representing unanimous agreement between the raters, 0 indicating they’re guessing randomly, and negative values suggesting the raters are systematically disagreeing. As suggested by Krippendorff, alphas above 0.8 are considered very good agreement, and tentative conclusions can be made with data where α≥0.667

sample data

# get some data
data <-
  tribble(
    ~content_id, ~coder_id, ~var1, ~var2,   ~var3,
    1,           "A",       1,     "Red",   FALSE,
    2,           "A",       3,     "Blue",  TRUE,
    3,           "A",       5,     "Blue",  TRUE,
    4,           "A",       7,     "Green", TRUE,
    5,           "A",       1,     "Red",   FALSE,
    1,           "B",       1,     "Red",   FALSE,
    2,           "B",       3,     "Blue",  FALSE,
    3,           "B",       3,     "Green", FALSE,
    4,           "B",       7,     "Green", TRUE,
    5,           "B",       3,     "Red",   FALSE,
  )

data |> print(n=100)
# A tibble: 10 × 5
   content_id coder_id  var1 var2  var3 
        <dbl> <chr>    <dbl> <chr> <lgl>
 1          1 A            1 Red   FALSE
 2          2 A            3 Blue  TRUE 
 3          3 A            5 Blue  TRUE 
 4          4 A            7 Green TRUE 
 5          5 A            1 Red   FALSE
 6          1 B            1 Red   FALSE
 7          2 B            3 Blue  FALSE
 8          3 B            3 Green FALSE
 9          4 B            7 Green TRUE 
10          5 B            3 Red   FALSE

exclude

if (T) {
knitr::knit_exit()
}